# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd
# Libraries to help with data visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
# Removes the limit for the number of displayed columns
pd.set_option("display.max_columns", None)
# Sets the limit for the number of displayed rows
pd.set_option("display.max_rows", 200)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS
df = pd.read_csv("used_phone_data.csv")
df.head()
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Honor | Android | 23.97 | yes | no | 13.0 | 5.0 | 64.0 | 3.0 | 3020.0 | 146.0 | 2020 | 127 | 111.62 | 86.96 |
| 1 | Honor | Android | 28.10 | yes | yes | 13.0 | 16.0 | 128.0 | 8.0 | 4300.0 | 213.0 | 2020 | 325 | 249.39 | 161.49 |
| 2 | Honor | Android | 24.29 | yes | yes | 13.0 | 8.0 | 128.0 | 8.0 | 4200.0 | 213.0 | 2020 | 162 | 359.47 | 268.55 |
| 3 | Honor | Android | 26.04 | yes | yes | 13.0 | 8.0 | 64.0 | 6.0 | 7250.0 | 480.0 | 2020 | 345 | 278.93 | 180.23 |
| 4 | Honor | Android | 15.72 | yes | no | 13.0 | 8.0 | 64.0 | 3.0 | 5000.0 | 185.0 | 2020 | 293 | 140.87 | 103.80 |
df.describe()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3571.000000 | 3391.000000 | 3569.000000 | 3561.000000 | 3561.000000 | 3565.000000 | 3564.000000 | 3571.000000 | 3571.000000 | 3571.000000 | 3571.000000 |
| mean | 14.803892 | 9.400454 | 6.547352 | 54.532607 | 4.056962 | 3067.225666 | 179.424285 | 2015.964996 | 675.391487 | 237.389037 | 109.880277 |
| std | 5.153092 | 4.818396 | 6.879359 | 84.696246 | 1.391844 | 1364.206665 | 90.280856 | 2.291784 | 248.640972 | 197.545581 | 121.501226 |
| min | 2.700000 | 0.080000 | 0.300000 | 0.005000 | 0.030000 | 80.000000 | 23.000000 | 2013.000000 | 91.000000 | 9.130000 | 2.510000 |
| 25% | 12.700000 | 5.000000 | 2.000000 | 16.000000 | 4.000000 | 2100.000000 | 140.000000 | 2014.000000 | 536.000000 | 120.130000 | 45.205000 |
| 50% | 13.490000 | 8.000000 | 5.000000 | 32.000000 | 4.000000 | 3000.000000 | 159.000000 | 2016.000000 | 690.000000 | 189.800000 | 75.530000 |
| 75% | 16.510000 | 13.000000 | 8.000000 | 64.000000 | 4.000000 | 4000.000000 | 184.000000 | 2018.000000 | 872.000000 | 291.935000 | 126.000000 |
| max | 46.360000 | 48.000000 | 32.000000 | 1024.000000 | 16.000000 | 12000.000000 | 950.000000 | 2020.000000 | 1094.000000 | 2560.200000 | 1916.540000 |
print(f"The Shape of the dataframe is {df.shape[0]} rows and {df.shape[1]} columns.") # f-string
The Shape of the dataframe is 3571 rows and 15 columns.
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3571 entries, 0 to 3570 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 brand_name 3571 non-null object 1 os 3571 non-null object 2 screen_size 3571 non-null float64 3 4g 3571 non-null object 4 5g 3571 non-null object 5 main_camera_mp 3391 non-null float64 6 selfie_camera_mp 3569 non-null float64 7 int_memory 3561 non-null float64 8 ram 3561 non-null float64 9 battery 3565 non-null float64 10 weight 3564 non-null float64 11 release_year 3571 non-null int64 12 days_used 3571 non-null int64 13 new_price 3571 non-null float64 14 used_price 3571 non-null float64 dtypes: float64(9), int64(2), object(4) memory usage: 418.6+ KB
import sys
!{sys.executable} -m pip install pandas-profiling
Requirement already satisfied: pandas-profiling in c:\users\bruns\anaconda3\lib\site-packages (3.0.0) Requirement already satisfied: htmlmin>=0.1.12 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (0.1.12) Requirement already satisfied: seaborn>=0.10.1 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (0.11.1) Requirement already satisfied: scipy>=1.4.1 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (1.6.1) Requirement already satisfied: pydantic>=1.8.1 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (1.8.2) Requirement already satisfied: pandas!=1.0.0,!=1.0.1,!=1.0.2,!=1.1.0,>=0.25.3 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (1.2.4) Requirement already satisfied: matplotlib>=3.2.0 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (3.3.4) Requirement already satisfied: joblib in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (1.0.1) Requirement already satisfied: jinja2>=2.11.1 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (2.11.3) Requirement already satisfied: visions[type_image_path]==0.7.1 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (0.7.1) Requirement already satisfied: missingno>=0.4.2 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (0.5.0) Requirement already satisfied: numpy>=1.16.0 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (1.20.1) Requirement already satisfied: tangled-up-in-unicode==0.1.0 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (0.1.0) Requirement already satisfied: PyYAML>=5.0.0 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (5.4.1) Requirement already satisfied: phik>=0.11.1 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (0.12.0) Requirement already satisfied: tqdm>=4.48.2 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (4.59.0) Requirement already satisfied: requests>=2.24.0 in c:\users\bruns\anaconda3\lib\site-packages (from pandas-profiling) (2.25.1) Requirement already satisfied: bottleneck in c:\users\bruns\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.1->pandas-profiling) (1.3.2) Requirement already satisfied: attrs>=19.3.0 in c:\users\bruns\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.1->pandas-profiling) (20.3.0) Requirement already satisfied: multimethod==1.4 in c:\users\bruns\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.1->pandas-profiling) (1.4) Requirement already satisfied: networkx>=2.4 in c:\users\bruns\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.1->pandas-profiling) (2.5) Requirement already satisfied: imagehash in c:\users\bruns\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.1->pandas-profiling) (4.2.1) Requirement already satisfied: Pillow in c:\users\bruns\anaconda3\lib\site-packages (from visions[type_image_path]==0.7.1->pandas-profiling) (8.2.0) Requirement already satisfied: MarkupSafe>=0.23 in c:\users\bruns\anaconda3\lib\site-packages (from jinja2>=2.11.1->pandas-profiling) (1.1.1) Requirement already satisfied: cycler>=0.10 in c:\users\bruns\anaconda3\lib\site-packages (from matplotlib>=3.2.0->pandas-profiling) (0.10.0) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\users\bruns\anaconda3\lib\site-packages (from matplotlib>=3.2.0->pandas-profiling) (2.4.7) Requirement already satisfied: python-dateutil>=2.1 in c:\users\bruns\anaconda3\lib\site-packages (from matplotlib>=3.2.0->pandas-profiling) (2.8.1) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\bruns\anaconda3\lib\site-packages (from matplotlib>=3.2.0->pandas-profiling) (1.3.1) Requirement already satisfied: six in c:\users\bruns\anaconda3\lib\site-packages (from cycler>=0.10->matplotlib>=3.2.0->pandas-profiling) (1.15.0) Requirement already satisfied: decorator>=4.3.0 in c:\users\bruns\anaconda3\lib\site-packages (from networkx>=2.4->visions[type_image_path]==0.7.1->pandas-profiling) (5.0.6) Requirement already satisfied: pytz>=2017.3 in c:\users\bruns\anaconda3\lib\site-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,!=1.1.0,>=0.25.3->pandas-profiling) (2021.1) Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\users\bruns\anaconda3\lib\site-packages (from pydantic>=1.8.1->pandas-profiling) (3.7.4.3) Requirement already satisfied: certifi>=2017.4.17 in c:\users\bruns\anaconda3\lib\site-packages (from requests>=2.24.0->pandas-profiling) (2020.12.5) Requirement already satisfied: chardet<5,>=3.0.2 in c:\users\bruns\anaconda3\lib\site-packages (from requests>=2.24.0->pandas-profiling) (4.0.0) Requirement already satisfied: idna<3,>=2.5 in c:\users\bruns\anaconda3\lib\site-packages (from requests>=2.24.0->pandas-profiling) (2.10) Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\bruns\anaconda3\lib\site-packages (from requests>=2.24.0->pandas-profiling) (1.26.4) Requirement already satisfied: PyWavelets in c:\users\bruns\anaconda3\lib\site-packages (from imagehash->visions[type_image_path]==0.7.1->pandas-profiling) (1.1.1)
import pandas_profiling
pandas_profiling.ProfileReport(df)
df['brand_name'] = df['brand_name'].astype('category')
df['os'] = df['os'].astype('category')
df['4g'] = df['4g'].astype('category')
df['5g'] = df['5g'].astype('category')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3571 entries, 0 to 3570 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 brand_name 3571 non-null category 1 os 3571 non-null category 2 screen_size 3571 non-null float64 3 4g 3571 non-null category 4 5g 3571 non-null category 5 main_camera_mp 3391 non-null float64 6 selfie_camera_mp 3569 non-null float64 7 int_memory 3561 non-null float64 8 ram 3561 non-null float64 9 battery 3565 non-null float64 10 weight 3564 non-null float64 11 release_year 3571 non-null int64 12 days_used 3571 non-null int64 13 new_price 3571 non-null float64 14 used_price 3571 non-null float64 dtypes: category(4), float64(9), int64(2) memory usage: 322.7 KB
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x2088603de80>
df_attr = df.iloc[:, 0:16]
sns.pairplot(df_attr, diag_kind='kde')
<seaborn.axisgrid.PairGrid at 0x2089a1227f0>
fig, ax = plt.subplots(figsize = (15,15))
a = sns.boxplot(x= "os", y = 'used_price' , data = df, hue = 'brand_name')
plt.show()
fig, ax = plt.subplots(figsize = (15,15))
a = sns.boxplot(x= "os", y = 'used_price' , data = df)
plt.show()
fig, ax = plt.subplots(figsize = (15,15))
a = sns.boxplot(x= "os", y = 'used_price' , data = df)
plt.show()
New and Used Price appear to contain a significant amount of outliers
# let's plot all the columns to look at their distributions
for i in df.columns:
plt.figure(figsize=(7, 4))
sns.histplot(data=df, x=i)
plt.show()
# let's plot the scatterplots of median price with all the features
for i in df.columns:
plt.figure(figsize=(6, 6))
sns.scatterplot(data=df, x=i, y="used_price")
plt.show()
# let's plot the scatterplots of median price with all the features
for i in df.columns:
plt.figure(figsize=(6, 6))
sns.scatterplot(data=df, x=i, y="used_price")
plt.show()
quartiles = np.quantile(df['used_price'][df['used_price'].notnull()], [.25, .75])
power_4iqr = 4 * (quartiles[1] - quartiles[0])
print(f'Q1 = {quartiles[0]}, Q3 = {quartiles[1]}, 4*IQR = {power_4iqr}')
outlier_used_price = df.loc[np.abs(df['used_price'] - df['used_price'].median()) > power_4iqr, 'used_price']
outlier_used_price
Q1 = 45.205, Q3 = 126.0, 4*IQR = 323.18
35 1049.97 37 413.14 45 1185.73 47 418.28 80 565.13 81 524.45 85 715.20 111 901.27 122 559.03 123 490.04 124 400.43 125 568.56 137 571.38 141 585.46 148 503.47 161 470.69 165 472.89 175 510.57 176 417.14 204 1248.99 210 448.34 216 533.34 260 1049.06 262 413.81 270 850.85 271 464.79 272 414.68 305 655.75 306 524.51 310 550.27 336 894.04 347 553.77 348 489.18 349 553.59 350 400.00 362 655.93 366 450.34 377 611.78 378 518.80 379 663.74 382 822.39 387 460.58 401 749.84 402 600.80 404 551.54 666 550.40 667 439.68 668 625.06 669 460.16 674 399.51 677 725.96 680 450.35 683 749.79 684 435.14 697 424.88 740 474.64 853 400.30 974 450.12 1044 583.07 1086 459.84 1131 479.52 1132 425.16 1208 423.64 1306 440.32 1307 443.76 1313 1150.41 1325 1039.60 1326 400.40 1341 520.10 1354 479.72 1696 617.09 1697 404.68 1699 401.07 1710 422.99 2110 431.78 2225 540.90 2445 726.73 2448 875.62 2454 453.34 2759 418.29 2779 650.33 2785 449.43 3032 400.24 3305 475.70 3317 550.33 3320 1916.54 3325 476.44 3326 597.26 3334 427.50 3337 412.08 3366 699.64 3367 613.87 3387 698.61 3394 407.09 3418 436.77 3420 535.92 3448 448.26 3460 548.87 3463 1916.34 3468 505.86 3469 597.14 3477 490.54 3478 540.01 3480 451.07 3509 702.85 3510 566.32 3511 439.92 3536 667.92 3537 789.54 3538 533.07 3539 495.89 3540 506.96 3541 453.12 3542 597.10 3552 765.02 3553 598.02 3562 592.09 3563 535.75 3564 401.39 3565 756.99 3566 490.96 Name: used_price, dtype: float64
quartiles = np.quantile(df['new_price'][df['new_price'].notnull()], [.25, .75])
power_4iqr = 4 * (quartiles[1] - quartiles[0])
print(f'Q1 = {quartiles[0]}, Q3 = {quartiles[1]}, 4*IQR = {power_4iqr}')
outlier_new_price = df.loc[np.abs(df['new_price'] - df['new_price'].median()) > power_4iqr, 'new_price']
outlier_new_price
Q1 = 120.13, Q3 = 291.935, 4*IQR = 687.22
35 1399.9900 45 1699.3600 85 1100.1200 111 1200.8500 137 880.8000 141 900.8700 204 2498.2400 260 1398.4700 270 1701.5400 310 1100.3100 336 1197.4900 362 879.7600 366 900.5800 377 880.7100 379 1020.0400 382 1100.6200 401 999.5800 407 879.7200 666 1100.8600 667 879.1000 668 1250.3200 669 1150.3200 674 999.0000 677 1451.9200 680 900.6300 683 1499.0700 684 1449.8200 740 949.2700 853 1000.4900 974 899.9500 1044 899.4100 1086 1149.6400 1131 959.0800 1306 880.8300 1313 2300.8700 1325 1600.2900 1327 879.9300 1341 1300.2400 1354 1199.6300 1381 921.5200 1696 951.5200 2225 1352.4200 2445 1050.7600 2448 1751.1800 2779 999.7400 2785 898.4000 3320 2560.2000 3326 917.1500 3366 934.9915 3387 934.1500 3463 2560.2000 3469 917.1500 3509 934.9915 3536 890.8000 3537 1054.4505 3542 918.0000 3552 1529.9915 3565 1163.6500 Name: new_price, dtype: float64
Removing Outliers from the DataFrame
# dropping these rows
df.drop(outlier_used_price.index, axis=0, inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3450 entries, 0 to 3570 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 brand_name 3450 non-null category 1 os 3450 non-null category 2 screen_size 3450 non-null float64 3 4g 3450 non-null category 4 5g 3450 non-null category 5 main_camera_mp 3296 non-null float64 6 selfie_camera_mp 3450 non-null float64 7 int_memory 3440 non-null float64 8 ram 3440 non-null float64 9 battery 3444 non-null float64 10 weight 3443 non-null float64 11 release_year 3450 non-null int64 12 days_used 3450 non-null int64 13 new_price 3450 non-null float64 14 used_price 3450 non-null float64 dtypes: category(4), float64(9), int64(2) memory usage: 338.7 KB
df.round({'new_price': 4})
| brand_name | os | screen_size | 4g | 5g | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Honor | Android | 23.97 | yes | no | 13.0 | 5.0 | 64.0 | 3.0 | 3020.0 | 146.0 | 2020 | 127 | 111.6200 | 86.96 |
| 1 | Honor | Android | 28.10 | yes | yes | 13.0 | 16.0 | 128.0 | 8.0 | 4300.0 | 213.0 | 2020 | 325 | 249.3900 | 161.49 |
| 2 | Honor | Android | 24.29 | yes | yes | 13.0 | 8.0 | 128.0 | 8.0 | 4200.0 | 213.0 | 2020 | 162 | 359.4700 | 268.55 |
| 3 | Honor | Android | 26.04 | yes | yes | 13.0 | 8.0 | 64.0 | 6.0 | 7250.0 | 480.0 | 2020 | 345 | 278.9300 | 180.23 |
| 4 | Honor | Android | 15.72 | yes | no | 13.0 | 8.0 | 64.0 | 3.0 | 5000.0 | 185.0 | 2020 | 293 | 140.8700 | 103.80 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3561 | Apple | iOS | 11.27 | yes | no | 12.0 | 7.0 | 64.0 | 3.0 | 1821.0 | 148.0 | 2020 | 235 | 356.1500 | 263.75 |
| 3567 | Asus | Android | 15.24 | yes | no | 13.0 | 8.0 | 128.0 | 8.0 | 4000.0 | 200.0 | 2018 | 541 | 518.8145 | 259.30 |
| 3568 | Alcatel | Android | 18.73 | yes | no | 13.0 | 5.0 | 32.0 | 3.0 | 4000.0 | 165.0 | 2020 | 201 | 92.6500 | 69.81 |
| 3569 | Alcatel | Android | 18.73 | yes | no | 13.0 | 5.0 | 32.0 | 2.0 | 4000.0 | 160.0 | 2020 | 149 | 101.9150 | 76.07 |
| 3570 | Alcatel | Android | 13.49 | yes | no | 13.0 | 5.0 | 16.0 | 2.0 | 4000.0 | 168.0 | 2020 | 176 | 72.2415 | 50.48 |
3450 rows × 15 columns
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3450 entries, 0 to 3570 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 brand_name 3450 non-null category 1 os 3450 non-null category 2 screen_size 3450 non-null float64 3 4g 3450 non-null category 4 5g 3450 non-null category 5 main_camera_mp 3296 non-null float64 6 selfie_camera_mp 3450 non-null float64 7 int_memory 3440 non-null float64 8 ram 3440 non-null float64 9 battery 3444 non-null float64 10 weight 3443 non-null float64 11 release_year 3450 non-null int64 12 days_used 3450 non-null int64 13 new_price 3450 non-null float64 14 used_price 3450 non-null float64 dtypes: category(4), float64(9), int64(2) memory usage: 467.7 KB
df.isnull().sum().sort_values(ascending=False)
main_camera_mp 154 int_memory 10 ram 10 weight 7 battery 6 brand_name 0 os 0 screen_size 0 4g 0 5g 0 selfie_camera_mp 0 release_year 0 days_used 0 new_price 0 used_price 0 dtype: int64
df.median()
screen_size 13.490 main_camera_mp 8.000 selfie_camera_mp 5.000 int_memory 32.000 ram 4.000 battery 3000.000 weight 158.000 release_year 2015.000 days_used 700.000 new_price 181.115 used_price 74.790 dtype: float64
df.fillna(df.median(), inplace=True)
df.isnull().sum().sort_values(ascending=False)
brand_name 0 os 0 screen_size 0 4g 0 5g 0 main_camera_mp 0 selfie_camera_mp 0 int_memory 0 ram 0 battery 0 weight 0 release_year 0 days_used 0 new_price 0 used_price 0 dtype: int64
df_attr = df.iloc[:, 0:16]
sns.pairplot(df_attr, diag_kind='kde')
<seaborn.axisgrid.PairGrid at 0x2088b401760>
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3450 entries, 0 to 3570 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 brand_name 3450 non-null category 1 os 3450 non-null category 2 screen_size 3450 non-null float64 3 4g 3450 non-null category 4 5g 3450 non-null category 5 main_camera_mp 3450 non-null float64 6 selfie_camera_mp 3450 non-null float64 7 int_memory 3450 non-null float64 8 ram 3450 non-null float64 9 battery 3450 non-null float64 10 weight 3450 non-null float64 11 release_year 3450 non-null int64 12 days_used 3450 non-null int64 13 new_price 3450 non-null float64 14 used_price 3450 non-null float64 dtypes: category(4), float64(9), int64(2) memory usage: 467.7 KB
df.describe()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 | 3450.000000 |
| mean | 14.621275 | 9.291287 | 6.256174 | 49.539309 | 3.962272 | 3023.475652 | 177.982304 | 2015.857391 | 687.591594 | 212.942016 | 93.062655 |
| std | 5.030298 | 4.706446 | 6.654136 | 74.744151 | 1.222456 | 1338.596984 | 89.235951 | 2.246987 | 242.317747 | 131.553900 | 69.530048 |
| min | 2.700000 | 0.080000 | 0.300000 | 0.005000 | 0.030000 | 80.000000 | 25.000000 | 2013.000000 | 91.000000 | 9.130000 | 2.510000 |
| 25% | 11.430000 | 5.000000 | 2.000000 | 16.000000 | 4.000000 | 2070.000000 | 140.000000 | 2014.000000 | 554.000000 | 119.560000 | 44.940000 |
| 50% | 13.490000 | 8.000000 | 5.000000 | 32.000000 | 4.000000 | 3000.000000 | 158.000000 | 2015.000000 | 700.000000 | 181.115000 | 74.790000 |
| 75% | 16.030000 | 13.000000 | 8.000000 | 64.000000 | 4.000000 | 4000.000000 | 181.000000 | 2018.000000 | 878.000000 | 279.315000 | 120.040000 |
| max | 46.360000 | 48.000000 | 32.000000 | 1024.000000 | 16.000000 | 9600.000000 | 950.000000 | 2020.000000 | 1094.000000 | 921.520000 | 398.560000 |
plt.figure(figsize=(15, 7))
sns.heatmap(df.corr(), annot=True, vmin=-1, vmax=1, fmt=".2f", cmap="Spectral")
plt.show()
df.brand_name.value_counts()
Others 503 Samsung 347 Huawei 240 LG 199 Lenovo 172 ZTE 139 Xiaomi 130 Alcatel 125 Asus 122 Oppo 121 Nokia 120 Micromax 120 Honor 116 Vivo 112 HTC 109 Motorola 108 Sony 85 Meizu 60 Gionee 55 Acer 51 XOLO 49 Panasonic 47 Apple 43 Realme 41 Celkon 37 Lava 36 Karbonn 30 Spice 30 Microsoft 22 Coolpad 22 BlackBerry 22 OnePlus 14 Google 13 Infinix 10 Name: brand_name, dtype: int64
df.groupby(['brand_name']).mean()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| brand_name | |||||||||||
| Acer | 15.245490 | 6.967647 | 2.474510 | 23.215686 | 3.901961 | 2999.607843 | 225.872549 | 2014.372549 | 795.921569 | 172.867647 | 63.798431 |
| Alcatel | 13.128480 | 6.324800 | 2.260800 | 36.224000 | 3.426000 | 2393.360000 | 166.465600 | 2014.976000 | 752.632000 | 142.499092 | 55.326080 |
| Apple | 14.987907 | 9.837209 | 4.293023 | 34.604651 | 3.906977 | 3365.116279 | 194.002326 | 2016.069767 | 710.906977 | 495.803488 | 200.189767 |
| Asus | 15.453770 | 9.927459 | 4.337705 | 49.442623 | 4.000000 | 3368.778689 | 216.072951 | 2015.311475 | 757.081967 | 228.343725 | 89.766311 |
| BlackBerry | 12.410909 | 9.909091 | 5.140909 | 57.454545 | 3.829545 | 2822.500000 | 160.568182 | 2015.181818 | 692.909091 | 234.774545 | 102.039091 |
| Celkon | 9.554054 | 3.597297 | 0.635135 | 219.675676 | 1.466216 | 1459.459459 | 135.202703 | 2013.243243 | 801.405405 | 60.956486 | 22.648378 |
| Coolpad | 14.040000 | 10.818182 | 7.181818 | 36.363636 | 3.954545 | 2945.000000 | 154.381818 | 2016.363636 | 766.227273 | 194.285455 | 77.928182 |
| Gionee | 13.617091 | 9.857273 | 5.109091 | 42.181818 | 3.931818 | 3047.363636 | 160.085455 | 2014.909091 | 784.945455 | 239.120000 | 85.191818 |
| 15.130000 | 11.892308 | 7.538462 | 56.615385 | 4.615385 | 3698.461538 | 191.076923 | 2017.923077 | 529.923077 | 408.600000 | 206.422308 | |
| HTC | 12.945505 | 10.772477 | 5.697248 | 32.733945 | 4.000000 | 2606.330275 | 154.946789 | 2015.146789 | 760.238532 | 246.174679 | 96.653394 |
| Honor | 18.955690 | 12.327586 | 11.782759 | 71.517241 | 4.534483 | 3629.396552 | 177.272414 | 2018.129310 | 470.310345 | 229.032509 | 126.601552 |
| Huawei | 16.585625 | 10.181667 | 9.662500 | 62.866667 | 4.430458 | 3486.637500 | 196.465417 | 2016.975000 | 581.004167 | 240.924533 | 119.535083 |
| Infinix | 18.478000 | 8.000000 | 11.200000 | 38.400000 | 2.600000 | 4800.000000 | 184.400000 | 2019.600000 | 313.300000 | 106.017000 | 68.979000 |
| Karbonn | 11.683667 | 6.796667 | 1.823333 | 65.600333 | 3.375000 | 1730.000000 | 137.933333 | 2013.366667 | 820.066667 | 110.384333 | 38.069333 |
| LG | 13.535075 | 8.455528 | 4.602513 | 29.266332 | 3.736181 | 2830.050251 | 163.501759 | 2015.693467 | 691.266332 | 205.807452 | 89.054271 |
| Lava | 12.465278 | 6.909722 | 2.419444 | 104.000000 | 3.277778 | 2248.611111 | 140.408333 | 2014.527778 | 787.222222 | 107.424722 | 40.112778 |
| Lenovo | 16.110000 | 8.946221 | 5.158140 | 42.232558 | 3.886628 | 3653.255814 | 227.841279 | 2015.610465 | 724.767442 | 193.358779 | 77.147500 |
| Meizu | 15.188833 | 12.393333 | 10.316667 | 56.266667 | 4.333333 | 3312.083333 | 158.850000 | 2017.083333 | 672.266667 | 258.551833 | 116.346833 |
| Micromax | 12.176417 | 5.973750 | 2.271667 | 46.286667 | 3.750000 | 2225.416667 | 145.315000 | 2014.300000 | 816.508333 | 106.964667 | 38.134417 |
| Microsoft | 13.010000 | 9.454545 | 3.181818 | 31.272727 | 4.000000 | 2410.454545 | 172.795455 | 2014.863636 | 853.636364 | 166.366818 | 55.316818 |
| Motorola | 14.111389 | 12.148148 | 7.685185 | 48.444444 | 3.796296 | 3170.879630 | 164.042593 | 2017.055556 | 563.296296 | 191.120287 | 93.066759 |
| Nokia | 11.588250 | 5.581333 | 5.100000 | 44.601167 | 3.631250 | 2137.250000 | 136.257500 | 2016.308333 | 624.066667 | 126.524975 | 56.290500 |
| OnePlus | 18.460000 | 10.214286 | 13.285714 | 131.428571 | 5.142857 | 3456.785714 | 174.642857 | 2017.142857 | 642.500000 | 449.824857 | 213.767857 |
| Oppo | 15.485207 | 10.160744 | 14.328926 | 75.239669 | 4.727273 | 3335.702479 | 164.411157 | 2017.165289 | 569.388430 | 304.861686 | 153.208512 |
| Others | 13.854473 | 7.882167 | 3.874155 | 39.864940 | 3.732167 | 2796.182903 | 190.031412 | 2015.121272 | 745.011928 | 183.708203 | 73.482565 |
| Panasonic | 12.996170 | 10.324468 | 4.453191 | 27.914894 | 4.000000 | 2682.978723 | 149.103191 | 2015.574468 | 810.468085 | 171.426596 | 60.302979 |
| Realme | 17.655610 | 8.609756 | 11.365854 | 70.634146 | 4.195122 | 4337.317073 | 188.804878 | 2019.463415 | 313.390244 | 186.031415 | 124.169024 |
| Samsung | 14.302305 | 9.092075 | 5.546398 | 37.244957 | 3.974063 | 3145.351585 | 192.958501 | 2015.515850 | 722.847262 | 262.694536 | 108.180865 |
| Sony | 13.885765 | 14.258824 | 5.231765 | 30.870588 | 4.023529 | 2918.411765 | 177.504706 | 2015.211765 | 743.035294 | 257.420253 | 102.960118 |
| Spice | 11.746000 | 4.921667 | 1.573333 | 49.600000 | 3.750000 | 2191.000000 | 161.130000 | 2013.233333 | 881.233333 | 98.000000 | 32.299667 |
| Vivo | 18.663839 | 12.379464 | 14.056250 | 81.892857 | 4.611607 | 3687.276786 | 171.541518 | 2017.803571 | 512.098214 | 306.192366 | 155.356696 |
| XOLO | 12.482041 | 7.455102 | 1.718367 | 22.204082 | 4.000000 | 2220.000000 | 152.016327 | 2013.653061 | 823.244898 | 130.898571 | 47.340204 |
| Xiaomi | 18.828923 | 11.657692 | 11.776154 | 73.169231 | 4.461538 | 3919.076923 | 182.635385 | 2017.615385 | 548.192308 | 231.294904 | 121.526231 |
| ZTE | 13.818201 | 11.364029 | 6.263309 | 43.741007 | 3.965827 | 2846.582734 | 155.595324 | 2015.856115 | 732.640288 | 209.717266 | 87.906115 |
df.groupby(['os']).mean()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| os | |||||||||||
| Android | 15.142748 | 9.640051 | 6.553868 | 49.219487 | 4.028494 | 3141.343840 | 182.612830 | 2015.941102 | 683.105062 | 217.486410 | 95.773989 |
| Others | 7.092500 | 3.828700 | 3.291500 | 64.521025 | 2.916700 | 1409.785000 | 109.077750 | 2015.085000 | 700.945000 | 88.638058 | 37.493250 |
| Windows | 12.578657 | 8.877612 | 2.334328 | 29.134328 | 4.000000 | 2182.089552 | 160.856716 | 2014.149254 | 837.507463 | 191.430448 | 65.083433 |
| iOS | 14.732381 | 9.880952 | 4.366667 | 34.666667 | 3.928571 | 3235.071429 | 187.121429 | 2016.000000 | 720.380952 | 499.325952 | 199.543333 |
df.groupby(['os']).describe()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
| os | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Android | 3141.0 | 15.142748 | 4.693865 | 2.86 | 12.70 | 13.49 | 16.67 | 46.36 | 3141.0 | 9.640051 | 4.512623 | 0.30 | 5.0 | 8.0 | 13.0 | 48.0 | 3141.0 | 6.553868 | 6.817000 | 0.3 | 2.0 | 5.0 | 8.0 | 32.0 | 3141.0 | 49.219487 | 74.238612 | 0.010 | 16.0 | 32.0 | 64.0 | 1024.0 | 3141.0 | 4.028494 | 1.062303 | 0.25 | 4.00 | 4.0 | 4.0 | 12.0 | 3141.0 | 3141.343840 | 1266.632676 | 225.0 | 2200.0 | 3000.0 | 4000.00 | 9600.0 | 3141.0 | 182.612830 | 87.669245 | 46.0 | 143.000 | 160.0 | 183.600 | 950.0 | 3141.0 | 2015.941102 | 2.242333 | 2013.0 | 2014.0 | 2016.0 | 2018.0 | 2020.0 | 3141.0 | 683.105062 | 243.842462 | 91.0 | 548.00 | 696.0 | 875.00 | 1094.0 | 3141.0 | 217.486410 | 125.212456 | 30.81 | 121.9200 | 189.81 | 279.7800 | 921.52 | 3141.0 | 95.773989 | 68.522442 | 9.30 | 47.9400 | 75.580 | 120.8400 | 398.56 |
| Others | 200.0 | 7.092500 | 3.877846 | 2.70 | 5.40 | 5.72 | 8.41 | 31.27 | 200.0 | 3.828700 | 3.927516 | 0.08 | 0.3 | 2.0 | 5.0 | 13.0 | 200.0 | 3.291500 | 3.892020 | 0.3 | 0.3 | 2.0 | 5.0 | 20.0 | 200.0 | 64.521025 | 96.757882 | 0.005 | 16.0 | 32.0 | 64.0 | 512.0 | 200.0 | 2.916700 | 2.627060 | 0.03 | 0.25 | 4.0 | 4.0 | 16.0 | 200.0 | 1409.785000 | 980.947772 | 80.0 | 820.0 | 1200.0 | 1830.00 | 8827.0 | 200.0 | 109.077750 | 60.185698 | 25.0 | 77.375 | 98.1 | 136.250 | 544.3 | 200.0 | 2015.085000 | 2.344195 | 2013.0 | 2013.0 | 2014.0 | 2017.0 | 2020.0 | 200.0 | 700.945000 | 233.046854 | 117.0 | 568.25 | 703.5 | 883.25 | 1094.0 | 200.0 | 88.638058 | 87.656717 | 9.13 | 21.6075 | 54.97 | 122.9750 | 430.09 | 200.0 | 37.493250 | 41.217811 | 2.51 | 9.5350 | 20.555 | 47.5275 | 227.34 |
| Windows | 67.0 | 12.578657 | 3.610521 | 10.16 | 10.16 | 11.27 | 12.70 | 27.94 | 67.0 | 8.877612 | 6.489101 | 2.00 | 5.0 | 6.7 | 8.0 | 41.0 | 67.0 | 2.334328 | 1.996156 | 0.3 | 1.0 | 1.6 | 5.0 | 8.0 | 67.0 | 29.134328 | 7.841026 | 16.000 | 32.0 | 32.0 | 32.0 | 64.0 | 67.0 | 4.000000 | 0.000000 | 4.00 | 4.00 | 4.0 | 4.0 | 4.0 | 67.0 | 2182.089552 | 613.774431 | 1200.0 | 1800.0 | 2000.0 | 2500.00 | 4150.0 | 67.0 | 160.856716 | 88.400727 | 98.9 | 134.000 | 146.0 | 159.500 | 675.9 | 67.0 | 2014.149254 | 0.821192 | 2013.0 | 2014.0 | 2014.0 | 2015.0 | 2016.0 | 67.0 | 837.507463 | 147.439805 | 564.0 | 706.50 | 824.0 | 953.50 | 1089.0 | 67.0 | 191.430448 | 121.202627 | 59.72 | 99.4850 | 159.92 | 269.2350 | 699.46 | 67.0 | 65.083433 | 42.204799 | 17.82 | 32.6150 | 55.300 | 91.4500 | 279.97 |
| iOS | 42.0 | 14.732381 | 6.566622 | 3.33 | 11.27 | 13.49 | 19.21 | 27.94 | 42.0 | 9.880952 | 2.370576 | 5.00 | 8.0 | 12.0 | 12.0 | 12.0 | 42.0 | 4.366667 | 2.698840 | 1.2 | 1.2 | 6.0 | 7.0 | 7.0 | 42.0 | 34.666667 | 17.990060 | 16.000 | 16.0 | 32.0 | 32.0 | 64.0 | 42.0 | 3.928571 | 0.341650 | 2.00 | 4.00 | 4.0 | 4.0 | 4.0 | 42.0 | 3235.071429 | 2759.796930 | 205.0 | 334.0 | 2720.5 | 5400.75 | 8827.0 | 42.0 | 187.121429 | 160.348397 | 25.0 | 43.375 | 140.5 | 300.125 | 469.0 | 42.0 | 2016.000000 | 1.912730 | 2013.0 | 2014.0 | 2016.0 | 2017.0 | 2020.0 | 42.0 | 720.380952 | 216.616662 | 235.0 | 592.75 | 702.0 | 868.50 | 1082.0 | 42.0 | 499.325952 | 180.177720 | 247.70 | 357.1200 | 450.07 | 689.1125 | 879.72 | 42.0 | 199.543333 | 86.340442 | 81.21 | 124.4025 | 193.590 | 275.5525 | 375.23 |
df = pd.get_dummies(df, columns=['brand_name','os','4g','5g'])
df.head()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | used_price | brand_name_Acer | brand_name_Alcatel | brand_name_Apple | brand_name_Asus | brand_name_BlackBerry | brand_name_Celkon | brand_name_Coolpad | brand_name_Gionee | brand_name_Google | brand_name_HTC | brand_name_Honor | brand_name_Huawei | brand_name_Infinix | brand_name_Karbonn | brand_name_LG | brand_name_Lava | brand_name_Lenovo | brand_name_Meizu | brand_name_Micromax | brand_name_Microsoft | brand_name_Motorola | brand_name_Nokia | brand_name_OnePlus | brand_name_Oppo | brand_name_Others | brand_name_Panasonic | brand_name_Realme | brand_name_Samsung | brand_name_Sony | brand_name_Spice | brand_name_Vivo | brand_name_XOLO | brand_name_Xiaomi | brand_name_ZTE | os_Android | os_Others | os_Windows | os_iOS | 4g_no | 4g_yes | 5g_no | 5g_yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 23.97 | 13.0 | 5.0 | 64.0 | 3.0 | 3020.0 | 146.0 | 2020 | 127 | 111.62 | 86.96 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
| 1 | 28.10 | 13.0 | 16.0 | 128.0 | 8.0 | 4300.0 | 213.0 | 2020 | 325 | 249.39 | 161.49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 2 | 24.29 | 13.0 | 8.0 | 128.0 | 8.0 | 4200.0 | 213.0 | 2020 | 162 | 359.47 | 268.55 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 3 | 26.04 | 13.0 | 8.0 | 64.0 | 6.0 | 7250.0 | 480.0 | 2020 | 345 | 278.93 | 180.23 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 4 | 15.72 | 13.0 | 8.0 | 64.0 | 3.0 | 5000.0 | 185.0 | 2020 | 293 | 140.87 | 103.80 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
X = df.drop("used_price", axis=1)
y = df["used_price"]
# splitting the data in 70:30 ratio for train to test data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.30, random_state=1
)
X_train.describe()
| screen_size | main_camera_mp | selfie_camera_mp | int_memory | ram | battery | weight | release_year | days_used | new_price | brand_name_Acer | brand_name_Alcatel | brand_name_Apple | brand_name_Asus | brand_name_BlackBerry | brand_name_Celkon | brand_name_Coolpad | brand_name_Gionee | brand_name_Google | brand_name_HTC | brand_name_Honor | brand_name_Huawei | brand_name_Infinix | brand_name_Karbonn | brand_name_LG | brand_name_Lava | brand_name_Lenovo | brand_name_Meizu | brand_name_Micromax | brand_name_Microsoft | brand_name_Motorola | brand_name_Nokia | brand_name_OnePlus | brand_name_Oppo | brand_name_Others | brand_name_Panasonic | brand_name_Realme | brand_name_Samsung | brand_name_Sony | brand_name_Spice | brand_name_Vivo | brand_name_XOLO | brand_name_Xiaomi | brand_name_ZTE | os_Android | os_Others | os_Windows | os_iOS | 4g_no | 4g_yes | 5g_no | 5g_yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 | 2415.000000 |
| mean | 14.739665 | 9.426969 | 6.435735 | 49.265863 | 3.975400 | 3044.078675 | 178.561387 | 2015.919669 | 682.623188 | 215.519618 | 0.014907 | 0.037681 | 0.013665 | 0.034783 | 0.006625 | 0.008282 | 0.007039 | 0.013665 | 0.002070 | 0.030228 | 0.032298 | 0.072050 | 0.002899 | 0.009524 | 0.057143 | 0.009524 | 0.050932 | 0.016977 | 0.034369 | 0.005383 | 0.033126 | 0.035197 | 0.004969 | 0.038509 | 0.136232 | 0.015321 | 0.012836 | 0.102277 | 0.024845 | 0.009110 | 0.035611 | 0.016149 | 0.034783 | 0.040994 | 0.912629 | 0.056315 | 0.017391 | 0.013665 | 0.338302 | 0.661698 | 0.972257 | 0.027743 |
| std | 5.035455 | 4.677233 | 6.689711 | 73.301312 | 1.240719 | 1315.422892 | 88.403798 | 2.252350 | 242.781953 | 132.420552 | 0.121205 | 0.190463 | 0.116118 | 0.183267 | 0.081142 | 0.090644 | 0.083622 | 0.116118 | 0.045464 | 0.171249 | 0.176827 | 0.258624 | 0.053771 | 0.097144 | 0.232163 | 0.097144 | 0.219904 | 0.129213 | 0.182212 | 0.073186 | 0.179003 | 0.184315 | 0.070330 | 0.192462 | 0.343106 | 0.122851 | 0.112592 | 0.303076 | 0.155684 | 0.095029 | 0.185356 | 0.126075 | 0.183267 | 0.198317 | 0.282436 | 0.230576 | 0.130751 | 0.116118 | 0.473230 | 0.473230 | 0.164270 | 0.164270 |
| min | 2.860000 | 0.080000 | 0.300000 | 0.005000 | 0.030000 | 80.000000 | 25.000000 | 2013.000000 | 92.000000 | 9.130000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 12.700000 | 5.000000 | 2.000000 | 16.000000 | 4.000000 | 2100.000000 | 140.000000 | 2014.000000 | 547.000000 | 120.030000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
| 50% | 13.490000 | 8.000000 | 5.000000 | 32.000000 | 4.000000 | 3000.000000 | 159.000000 | 2015.000000 | 696.000000 | 183.320000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 0.000000 |
| 75% | 16.030000 | 13.000000 | 8.000000 | 64.000000 | 4.000000 | 4000.000000 | 182.000000 | 2018.000000 | 874.000000 | 280.195000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 |
| max | 46.360000 | 48.000000 | 32.000000 | 1024.000000 | 16.000000 | 9600.000000 | 950.000000 | 2020.000000 | 1094.000000 | 921.520000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
# intialize the model to be fit and fit the model on the train data
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)
LinearRegression()
Get the score on training set
# get the score (R-squared) on the training set
print(
"The score (R-squared) on the training set is ",
regression_model.score(X_train, y_train),
)
The score (R-squared) on the training set is 0.9452839434052618
def r_squared(model, X, y):
y_mean = y.mean()
SST = ((y - y_mean) ** 2).sum()
SSE = ((y - model.predict(X)) ** 2).sum()
r_square = 1 - SSE / SST
return SSE, SST, r_square
SSE, SST, r_square = r_squared(regression_model, X_train, y_train)
print("SSE: ", SSE)
print("SST: ", SST)
print("R-squared: ", r_square)
SSE: 649120.7141178057 SST: 11863441.090530433 R-squared: 0.9452839434052618
Get the score on test set
# get the score (R-squared) on the test set
print(
"The score (R-squared) on the test set is ", regression_model.score(X_test, y_test)
)
The score (R-squared) on the test set is 0.9367482592294135
Get the RMSE on train set
print(
"The Root Mean Square Error (RMSE) of the model for the training set is ",
np.sqrt(mean_squared_error(y_train, regression_model.predict(X_train))),
)
The Root Mean Square Error (RMSE) of the model for the training set is 16.394726139321644
Get the RMSE on test set
print(
"The Root Mean Square Error (RMSE) of the model for the test set is ",
np.sqrt(mean_squared_error(y_test, regression_model.predict(X_test))),
)
The Root Mean Square Error (RMSE) of the model for the test set is 17.119211146538838
Get the model coefficients
# let's check the coefficients and intercept of the model
coef_df = pd.DataFrame(
np.append(regression_model.coef_, regression_model.intercept_),
index=X_train.columns.tolist() + ["Intercept"],
columns=["Coefficients"],
)
coef_df
| Coefficients | |
|---|---|
| screen_size | 0.053543 |
| main_camera_mp | -0.805988 |
| selfie_camera_mp | 0.613928 |
| int_memory | 0.035250 |
| ram | 2.804793 |
| battery | -0.000369 |
| weight | -0.007774 |
| release_year | 0.847579 |
| days_used | -0.095390 |
| new_price | 0.421928 |
| brand_name_Acer | 0.176839 |
| brand_name_Alcatel | 0.114099 |
| brand_name_Apple | -3.611314 |
| brand_name_Asus | 0.086774 |
| brand_name_BlackBerry | 5.661039 |
| brand_name_Celkon | 4.932113 |
| brand_name_Coolpad | -0.895105 |
| brand_name_Gionee | -6.186673 |
| brand_name_Google | 38.919759 |
| brand_name_HTC | -0.472049 |
| brand_name_Honor | -4.761710 |
| brand_name_Huawei | -2.382855 |
| brand_name_Infinix | -16.731424 |
| brand_name_Karbonn | 2.400784 |
| brand_name_LG | 1.391148 |
| brand_name_Lava | 1.182560 |
| brand_name_Lenovo | -3.950064 |
| brand_name_Meizu | -1.038285 |
| brand_name_Micromax | 1.549807 |
| brand_name_Microsoft | -0.705211 |
| brand_name_Motorola | -2.171833 |
| brand_name_Nokia | -8.527877 |
| brand_name_OnePlus | 6.782538 |
| brand_name_Oppo | -5.523624 |
| brand_name_Others | -0.101932 |
| brand_name_Panasonic | -1.468594 |
| brand_name_Realme | -8.524781 |
| brand_name_Samsung | -1.486388 |
| brand_name_Sony | 2.117931 |
| brand_name_Spice | 6.432004 |
| brand_name_Vivo | -3.842789 |
| brand_name_XOLO | 1.990760 |
| brand_name_Xiaomi | -2.427485 |
| brand_name_ZTE | 1.071837 |
| os_Android | 1.118320 |
| os_Others | -0.915887 |
| os_Windows | 3.408881 |
| os_iOS | -3.611314 |
| 4g_no | 1.854243 |
| 4g_yes | -1.854243 |
| 5g_no | -23.191041 |
| 5g_yes | 23.191041 |
| Intercept | -1624.738093 |
Automate the equation of the fit
# Let us write the equation of linear regression
Equation = "Price = " + str(regression_model.intercept_)
print(Equation, end=" ")
for i in range(len(X_train.columns)):
if i != len(X_train.columns) - 1:
print(
"+ (",
regression_model.coef_[i],
")*(",
X_train.columns[i],
")",
end=" ",
)
else:
print("+ (", regression_model.coef_[i], ")*(", X_train.columns[i], ")")
Price = -1624.7380934470625 + ( 0.053543257233632964 )*( screen_size ) + ( -0.8059880670283929 )*( main_camera_mp ) + ( 0.6139278221200691 )*( selfie_camera_mp ) + ( 0.035250008079695194 )*( int_memory ) + ( 2.8047927240222434 )*( ram ) + ( -0.00036886250308776347 )*( battery ) + ( -0.007773780146768772 )*( weight ) + ( 0.8475793211476059 )*( release_year ) + ( -0.09539020733014647 )*( days_used ) + ( 0.42192819655928737 )*( new_price ) + ( 0.17683870408045266 )*( brand_name_Acer ) + ( 0.11409920144222552 )*( brand_name_Alcatel ) + ( -3.611314225928028 )*( brand_name_Apple ) + ( 0.08677380329715056 )*( brand_name_Asus ) + ( 5.661038962081446 )*( brand_name_BlackBerry ) + ( 4.932112901893202 )*( brand_name_Celkon ) + ( -0.8951047730223278 )*( brand_name_Coolpad ) + ( -6.186673023060855 )*( brand_name_Gionee ) + ( 38.91975923037877 )*( brand_name_Google ) + ( -0.4720491164182672 )*( brand_name_HTC ) + ( -4.7617104438507285 )*( brand_name_Honor ) + ( -2.3828550916040747 )*( brand_name_Huawei ) + ( -16.73142429585952 )*( brand_name_Infinix ) + ( 2.400784090794901 )*( brand_name_Karbonn ) + ( 1.3911476829752747 )*( brand_name_LG ) + ( 1.1825602038011374 )*( brand_name_Lava ) + ( -3.9500638709963978 )*( brand_name_Lenovo ) + ( -1.0382852126359072 )*( brand_name_Meizu ) + ( 1.549807013143882 )*( brand_name_Micromax ) + ( -0.7052107172917577 )*( brand_name_Microsoft ) + ( -2.1718328954287993 )*( brand_name_Motorola ) + ( -8.527877119398536 )*( brand_name_Nokia ) + ( 6.782538413228437 )*( brand_name_OnePlus ) + ( -5.523623518801358 )*( brand_name_Oppo ) + ( -0.10193153821370382 )*( brand_name_Others ) + ( -1.4685938547421555 )*( brand_name_Panasonic ) + ( -8.524781297587573 )*( brand_name_Realme ) + ( -1.4863879512396525 )*( brand_name_Samsung ) + ( 2.1179308231072813 )*( brand_name_Sony ) + ( 6.4320040767325235 )*( brand_name_Spice ) + ( -3.842788929304269 )*( brand_name_Vivo ) + ( 1.9907604163679307 )*( brand_name_XOLO ) + ( -2.4274846546746947 )*( brand_name_Xiaomi ) + ( 1.0718370067356204 )*( brand_name_ZTE ) + ( 1.1183201070051179 )*( os_Android ) + ( -0.915886693708293 )*( os_Others ) + ( 3.4088808126179195 )*( os_Windows ) + ( -3.6113142259284348 )*( os_iOS ) + ( 1.854242784269469 )*( 4g_no ) + ( -1.8542427842662619 )*( 4g_yes ) + ( -23.19104135493962 )*( 5g_no ) + ( 23.191041354939664 )*( 5g_yes )
import statsmodels.api as sm
# this adds the constant term to the dataset
X_con = sm.add_constant(X)
X_trainc, X_testc, y_trainc, y_testc = train_test_split(
X_con, y, test_size=0.30, random_state=1
)
Make the linear model using statsmodels OLS and print the model summary.
model = sm.OLS(y_trainc, X_trainc).fit()
model.summary()
| Dep. Variable: | used_price | R-squared: | 0.945 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.944 |
| Method: | Least Squares | F-statistic: | 870.1 |
| Date: | Wed, 29 Sep 2021 | Prob (F-statistic): | 0.00 |
| Time: | 23:56:28 | Log-Likelihood: | -10181. |
| No. Observations: | 2415 | AIC: | 2.046e+04 |
| Df Residuals: | 2367 | BIC: | 2.074e+04 |
| Df Model: | 47 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | -704.0532 | 273.335 | -2.576 | 0.010 | -1240.054 | -168.052 |
| screen_size | 0.0535 | 0.120 | 0.445 | 0.657 | -0.183 | 0.290 |
| main_camera_mp | -0.8060 | 0.104 | -7.776 | 0.000 | -1.009 | -0.603 |
| selfie_camera_mp | 0.6139 | 0.086 | 7.142 | 0.000 | 0.445 | 0.782 |
| int_memory | 0.0353 | 0.005 | 6.743 | 0.000 | 0.025 | 0.046 |
| ram | 2.8048 | 0.346 | 8.108 | 0.000 | 2.126 | 3.483 |
| battery | -0.0004 | 0.001 | -0.717 | 0.474 | -0.001 | 0.001 |
| weight | -0.0078 | 0.007 | -1.067 | 0.286 | -0.022 | 0.007 |
| release_year | 0.8476 | 0.313 | 2.711 | 0.007 | 0.234 | 1.461 |
| days_used | -0.0954 | 0.002 | -43.717 | 0.000 | -0.100 | -0.091 |
| new_price | 0.4219 | 0.004 | 110.565 | 0.000 | 0.414 | 0.429 |
| brand_name_Acer | -17.8758 | 7.471 | -2.393 | 0.017 | -32.527 | -3.225 |
| brand_name_Alcatel | -17.9385 | 7.230 | -2.481 | 0.013 | -32.116 | -3.761 |
| brand_name_Apple | -111.9272 | 42.197 | -2.653 | 0.008 | -194.674 | -29.181 |
| brand_name_Asus | -17.9659 | 7.216 | -2.490 | 0.013 | -32.116 | -3.816 |
| brand_name_BlackBerry | -12.3916 | 7.843 | -1.580 | 0.114 | -27.772 | 2.989 |
| brand_name_Celkon | -13.1205 | 7.682 | -1.708 | 0.088 | -28.184 | 1.943 |
| brand_name_Coolpad | -18.9478 | 8.099 | -2.340 | 0.019 | -34.829 | -3.067 |
| brand_name_Gionee | -24.2393 | 7.470 | -3.245 | 0.001 | -38.889 | -9.590 |
| brand_name_Google | 20.8671 | 10.344 | 2.017 | 0.044 | 0.583 | 41.152 |
| brand_name_HTC | -18.5247 | 7.177 | -2.581 | 0.010 | -32.598 | -4.452 |
| brand_name_Honor | -22.8144 | 7.344 | -3.107 | 0.002 | -37.216 | -8.413 |
| brand_name_Huawei | -20.4355 | 7.200 | -2.838 | 0.005 | -34.555 | -6.316 |
| brand_name_Infinix | -34.7841 | 9.428 | -3.690 | 0.000 | -53.272 | -16.297 |
| brand_name_Karbonn | -15.6519 | 7.651 | -2.046 | 0.041 | -30.656 | -0.648 |
| brand_name_LG | -16.6615 | 7.141 | -2.333 | 0.020 | -30.665 | -2.658 |
| brand_name_Lava | -16.8701 | 7.854 | -2.148 | 0.032 | -32.272 | -1.468 |
| brand_name_Lenovo | -22.0027 | 7.170 | -3.069 | 0.002 | -36.062 | -7.943 |
| brand_name_Meizu | -19.0909 | 7.595 | -2.514 | 0.012 | -33.985 | -4.197 |
| brand_name_Micromax | -16.5028 | 7.250 | -2.276 | 0.023 | -30.719 | -2.287 |
| brand_name_Microsoft | -18.7579 | 9.167 | -2.046 | 0.041 | -36.733 | -0.782 |
| brand_name_Motorola | -20.2245 | 7.360 | -2.748 | 0.006 | -34.657 | -5.792 |
| brand_name_Nokia | -26.5805 | 7.439 | -3.573 | 0.000 | -41.169 | -11.992 |
| brand_name_OnePlus | -11.2701 | 8.551 | -1.318 | 0.188 | -28.039 | 5.499 |
| brand_name_Oppo | -23.5763 | 7.205 | -3.272 | 0.001 | -37.705 | -9.447 |
| brand_name_Others | -18.1546 | 7.117 | -2.551 | 0.011 | -32.110 | -4.199 |
| brand_name_Panasonic | -19.5212 | 7.588 | -2.573 | 0.010 | -34.401 | -4.641 |
| brand_name_Realme | -26.5774 | 7.831 | -3.394 | 0.001 | -41.935 | -11.220 |
| brand_name_Samsung | -19.5390 | 7.072 | -2.763 | 0.006 | -33.407 | -5.671 |
| brand_name_Sony | -15.9347 | 7.209 | -2.210 | 0.027 | -30.072 | -1.797 |
| brand_name_Spice | -11.6206 | 7.714 | -1.506 | 0.132 | -26.747 | 3.506 |
| brand_name_Vivo | -21.8954 | 7.248 | -3.021 | 0.003 | -36.108 | -7.683 |
| brand_name_XOLO | -16.0619 | 7.400 | -2.171 | 0.030 | -30.572 | -1.552 |
| brand_name_Xiaomi | -20.4801 | 7.224 | -2.835 | 0.005 | -34.646 | -6.315 |
| brand_name_ZTE | -16.9808 | 7.182 | -2.364 | 0.018 | -31.065 | -2.896 |
| os_Android | -197.4608 | 77.072 | -2.562 | 0.010 | -348.597 | -46.324 |
| os_Others | -199.4950 | 77.258 | -2.582 | 0.010 | -350.995 | -47.995 |
| os_Windows | -195.1702 | 76.895 | -2.538 | 0.011 | -345.959 | -44.381 |
| os_iOS | -111.9272 | 42.197 | -2.653 | 0.008 | -194.674 | -29.181 |
| 4g_no | -350.1723 | 136.481 | -2.566 | 0.010 | -617.807 | -82.538 |
| 4g_yes | -353.8808 | 136.856 | -2.586 | 0.010 | -622.251 | -85.511 |
| 5g_no | -375.2176 | 136.587 | -2.747 | 0.006 | -643.060 | -107.375 |
| 5g_yes | -328.8355 | 136.759 | -2.404 | 0.016 | -597.016 | -60.655 |
| Omnibus: | 464.933 | Durbin-Watson: | 1.979 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 4106.540 |
| Skew: | 0.650 | Prob(JB): | 0.00 |
| Kurtosis: | 9.255 | Cond. No. | 1.04e+16 |
Get the value of the coefficient of determination.
print("The coefficient of determination (R-squared) is ", model.rsquared)
The coefficient of determination (R-squared) is 0.9452839434052618
Automate the equation of fit
# Let us write the equation of linear regression
Equation = "Price ="
print(Equation, end=" ")
for i in range(len(X_trainc.columns)):
if i == 0:
print(model.params[i], "+", end=" ")
elif i != len(X_trainc.columns) - 1:
print(
"(",
model.params[i],
")*(",
X_trainc.columns[i],
")",
"+",
end=" ",
)
else:
print("(", model.params[i], ")*(", X_trainc.columns[i], ")")
Price = -704.0531737377777 + ( 0.053543257239236475 )*( selfie_camera_mp ) + ( -0.8059880670267164 )*( int_memory ) + ( 0.6139278221304648 )*( ram ) + ( 0.03525000807973884 )*( release_year ) + ( 2.804792724014244 )*( days_used ) + ( -0.00036886250306190026 )*( new_price ) + ( -0.007773780147057652 )*( used_price ) + ( 0.8475793210455903 )*( brand_name_Acer ) + ( -0.09539020733050882 )*( brand_name_Alcatel ) + ( 0.4219281965590277 )*( brand_name_Apple ) + ( -17.87580677639521 )*( brand_name_Asus ) + ( -17.938546279024962 )*( brand_name_BlackBerry ) + ( -111.92718710861979 )*( brand_name_Celkon ) + ( -17.965871677170913 )*( brand_name_Coolpad ) + ( -12.391606518485354 )*( brand_name_Gionee ) + ( -13.12053257869044 )*( brand_name_HTC ) + ( -18.947750253461447 )*( brand_name_Honor ) + ( -24.23931850354949 )*( brand_name_Huawei ) + ( 20.867113750086492 )*( brand_name_Infinix ) + ( -18.52469459690521 )*( brand_name_Karbonn ) + ( -22.814355924283312 )*( brand_name_LG ) + ( -20.435500572040812 )*( brand_name_Lava ) + ( -34.784069776286586 )*( brand_name_Lenovo ) + ( -15.651861389722239 )*( brand_name_Meizu ) + ( -16.66149779749055 )*( brand_name_Micromax ) + ( -16.870085276632167 )*( brand_name_Microsoft ) + ( -22.002709351459313 )*( brand_name_Motorola ) + ( -19.090930693058638 )*( brand_name_Nokia ) + ( -16.502838467313005 )*( brand_name_Oppo ) + ( -18.757856197639022 )*( brand_name_Others ) + ( -20.224478375858393 )*( brand_name_Panasonic ) + ( -26.580522599802514 )*( brand_name_Realme ) + ( -11.270107067227393 )*( brand_name_Samsung ) + ( -23.576268999273246 )*( brand_name_Sony ) + ( -18.154577018661403 )*( brand_name_Vivo ) + ( -19.521239335173487 )*( brand_name_Xiaomi ) + ( -26.57742677796724 )*( brand_name_ZTE ) + ( -19.53903343170927 )*( os_Android ) + ( -15.934714657404236 )*( os_Others ) + ( -11.620641403789392 )*( os_Windows ) + ( -21.895434409766995 )*( os_iOS ) + ( -16.061885064125164 )*( 4g_no ) + ( -20.480130135145345 )*( 4g_yes ) + ( -16.980808473725084 )*( 5g_no ) + ( -197.4607801779984 )*( 5g_yes )
X_trainc.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3450 entries, 0 to 3570 Data columns (total 46 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 main_camera_mp 3450 non-null float64 1 selfie_camera_mp 3450 non-null float64 2 int_memory 3450 non-null float64 3 ram 3450 non-null float64 4 release_year 3450 non-null int64 5 days_used 3450 non-null int64 6 new_price 3450 non-null float64 7 used_price 3450 non-null float64 8 brand_name_Acer 3450 non-null uint8 9 brand_name_Alcatel 3450 non-null uint8 10 brand_name_Apple 3450 non-null uint8 11 brand_name_Asus 3450 non-null uint8 12 brand_name_BlackBerry 3450 non-null uint8 13 brand_name_Celkon 3450 non-null uint8 14 brand_name_Coolpad 3450 non-null uint8 15 brand_name_Gionee 3450 non-null uint8 16 brand_name_HTC 3450 non-null uint8 17 brand_name_Honor 3450 non-null uint8 18 brand_name_Huawei 3450 non-null uint8 19 brand_name_Infinix 3450 non-null uint8 20 brand_name_Karbonn 3450 non-null uint8 21 brand_name_LG 3450 non-null uint8 22 brand_name_Lava 3450 non-null uint8 23 brand_name_Lenovo 3450 non-null uint8 24 brand_name_Meizu 3450 non-null uint8 25 brand_name_Micromax 3450 non-null uint8 26 brand_name_Microsoft 3450 non-null uint8 27 brand_name_Motorola 3450 non-null uint8 28 brand_name_Nokia 3450 non-null uint8 29 brand_name_Oppo 3450 non-null uint8 30 brand_name_Others 3450 non-null uint8 31 brand_name_Panasonic 3450 non-null uint8 32 brand_name_Realme 3450 non-null uint8 33 brand_name_Samsung 3450 non-null uint8 34 brand_name_Sony 3450 non-null uint8 35 brand_name_Vivo 3450 non-null uint8 36 brand_name_Xiaomi 3450 non-null uint8 37 brand_name_ZTE 3450 non-null uint8 38 os_Android 3450 non-null uint8 39 os_Others 3450 non-null uint8 40 os_Windows 3450 non-null uint8 41 os_iOS 3450 non-null uint8 42 4g_no 3450 non-null uint8 43 4g_yes 3450 non-null uint8 44 5g_no 3450 non-null uint8 45 5g_yes 3450 non-null uint8 dtypes: float64(6), int64(2), uint8(38) memory usage: 499.6 KB
X_testc.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1035 entries, 1473 to 3084 Data columns (total 53 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 const 1035 non-null float64 1 screen_size 1035 non-null float64 2 main_camera_mp 1035 non-null float64 3 selfie_camera_mp 1035 non-null float64 4 int_memory 1035 non-null float64 5 ram 1035 non-null float64 6 battery 1035 non-null float64 7 weight 1035 non-null float64 8 release_year 1035 non-null int64 9 days_used 1035 non-null int64 10 new_price 1035 non-null float64 11 brand_name_Acer 1035 non-null uint8 12 brand_name_Alcatel 1035 non-null uint8 13 brand_name_Apple 1035 non-null uint8 14 brand_name_Asus 1035 non-null uint8 15 brand_name_BlackBerry 1035 non-null uint8 16 brand_name_Celkon 1035 non-null uint8 17 brand_name_Coolpad 1035 non-null uint8 18 brand_name_Gionee 1035 non-null uint8 19 brand_name_Google 1035 non-null uint8 20 brand_name_HTC 1035 non-null uint8 21 brand_name_Honor 1035 non-null uint8 22 brand_name_Huawei 1035 non-null uint8 23 brand_name_Infinix 1035 non-null uint8 24 brand_name_Karbonn 1035 non-null uint8 25 brand_name_LG 1035 non-null uint8 26 brand_name_Lava 1035 non-null uint8 27 brand_name_Lenovo 1035 non-null uint8 28 brand_name_Meizu 1035 non-null uint8 29 brand_name_Micromax 1035 non-null uint8 30 brand_name_Microsoft 1035 non-null uint8 31 brand_name_Motorola 1035 non-null uint8 32 brand_name_Nokia 1035 non-null uint8 33 brand_name_OnePlus 1035 non-null uint8 34 brand_name_Oppo 1035 non-null uint8 35 brand_name_Others 1035 non-null uint8 36 brand_name_Panasonic 1035 non-null uint8 37 brand_name_Realme 1035 non-null uint8 38 brand_name_Samsung 1035 non-null uint8 39 brand_name_Sony 1035 non-null uint8 40 brand_name_Spice 1035 non-null uint8 41 brand_name_Vivo 1035 non-null uint8 42 brand_name_XOLO 1035 non-null uint8 43 brand_name_Xiaomi 1035 non-null uint8 44 brand_name_ZTE 1035 non-null uint8 45 os_Android 1035 non-null uint8 46 os_Others 1035 non-null uint8 47 os_Windows 1035 non-null uint8 48 os_iOS 1035 non-null uint8 49 4g_no 1035 non-null uint8 50 4g_yes 1035 non-null uint8 51 5g_no 1035 non-null uint8 52 5g_yes 1035 non-null uint8 dtypes: float64(9), int64(2), uint8(42) memory usage: 139.5 KB
7 Variables: screen size, battery, weight, Google brand, OnePlus brand, Splice brand, and XOLO brand affected the regression line of best fit dramatically. Including these variables cause the prediction of price to be less accurate based on the dataset provided.